from __future__ import division
from operator import itemgetter, attrgetter
import os
import sys
import math

# the purpose of this program is to divide the 100KQueries query trance into N pieces based on the percentages
# Please put the percentage number(NOT with the symbol %) in order into a list, the input this program is just to read this list
# eg. [85,10,4,1]

inputPercentageNumberList = [85,10,4,1]
TOTAL_NUM_OF_QUERIES = 100000
NUM_OF_OUTPUT_FILES_BE_GENERATED = len(inputPercentageNumberList)

print NUM_OF_OUTPUT_FILES_BE_GENERATED,"output files will be generated."
basePathOutputFilesPath = "/data3/obukai/the_new_trip_of_feature_generation/gov2ClearYourMindAndDoItAgain/probabilityDistributionEstimationByProf/fourSetOfQueriesByProf20130410/100KQueries"
currentOutputFileName = ""

inputFileName = "/data3/obukai/the_new_trip_of_feature_generation/gov2ClearYourMindAndDoItAgain/100KQueries"
inputFileHandler = open(inputFileName,"r")

for i in range(0,NUM_OF_OUTPUT_FILES_BE_GENERATED):
    currentOutputFileName = basePathOutputFilesPath + "_" + str(i) + "_" + str(inputPercentageNumberList[i]) + "%"
    TOTAL_NUM_OF_QUERIES_FOR_CURRENT_OUTPUT_FILE = int(TOTAL_NUM_OF_QUERIES * inputPercentageNumberList[i] / 100)
    print currentOutputFileName,TOTAL_NUM_OF_QUERIES_FOR_CURRENT_OUTPUT_FILE
    currentOutputFileHandler = open(currentOutputFileName,"w")
    
    while TOTAL_NUM_OF_QUERIES_FOR_CURRENT_OUTPUT_FILE != 0:
        TOTAL_NUM_OF_QUERIES_FOR_CURRENT_OUTPUT_FILE -= 1
        dataLine = inputFileHandler.readline()
        currentOutputFileHandler.write(dataLine)
    
    currentOutputFileHandler.close()


inputFileHandler.close()